Prepare the data

This data is STDIF structure - a class for unstructured spatio-temporal data, with n spatial locations and times, where n observations are available, from the spacetime package.

The 0s actually reflect that there were no observations at that time - in other words, they are not false absences, and should not be removed.

## class       : SpatialPoints 
## features    : 1 
## extent      : 1189877, 1189877, 459784, 459784  (xmin, xmax, ymin, ymax)
## crs         : +proj=lcc +lat_0=44 +lon_0=-70 +lat_1=50 +lat_2=46 +x_0=800000 +y_0=0 +datum=NAD83 +units=m +no_defs
##      timeIndex
## [1,]         1
## [2,]         2
## [3,]         3
## [4,]         4
## [5,]         5
## [6,]         6
## [1] "1971-09-07 08:25:00 ADT" "1971-09-07 11:15:00 ADT"
## [3] "1971-09-07 12:45:00 ADT" "1971-09-07 15:05:00 ADT"
## [5] "1971-09-08 08:25:00 ADT" "1971-09-08 10:25:00 ADT"

Data exploration

White hake abundances

Through time

plot(number@endTime, number@data$`WHITE HAKE`, type = "l")

In space

plot(number@sp, pch = 19, cex = number@data$`WHITE HAKE`/300)
plot(gulf, add = TRUE)

Explanatory variables

# function to add coordinates to dataset in preparation for conversion to sf
add_coords <- function(stidf_object){
  # assign datasets to objects to simplify the code a bit
  df <- stidf_object@data 
  coords <- stidf_object@sp@coords

  # add coordinate columns to dataset
  df <- mutate(df,
           longitude = coords[,1],
           latitude = coords[,2],
           time = lubridate::date(stidf_object@endTime),
           year = lubridate::year(stidf_object@endTime)
           )
  return(df)
}

# convert datasets to sf objects

explan_sf <- add_coords(explan) %>%
  st_as_sf(x = ., coords = c("longitude", "latitude"))
st_crs(explan_sf) <- explan@sp@proj4string # set coordinate ref system

gulf_sf <- st_as_sf(gulf)
st_crs(gulf_sf) <- gulf@proj4string # set coordinate ref system

# function ot make ggplot map of explanatory variable
explan_map <- function(variable, legend_title){
  ggplot() +
  geom_sf(data = gulf_sf, fill = "white") +
  geom_sf(data = filter(explan_sf, 
                        # pick a sequence of years to check out
                        year %in% c(1971, 1981, 1992, 2001, 2011, 2019)),
          aes(col = get(variable))) +
  facet_wrap(~year) +
  scale_color_viridis_c(option = "viridis") +
  labs(color = legend_title) +
  theme_void()
}

# maps
p1 <- explan_map("bottom.temperature", "Bottom \ntemperature")
p2 <- explan_map("bottom.salinity", "Bottom \nsalinity")
p3 <- explan_map("depth", "Depth")
p4 <- explan_map("slope", "Slope")
# patchwork together!
(p1 + p2) / (p3 + p4)

Step 1 - Building the meshes

This mesh size reflects the range of coordinates in space. Changing to a smaller value, like 35000, gives a more appropriate mesh.

#_____________________
## Reorganise the data
#_____________________
### Space
xy <- coordinates(explan)
## Loading required package: spacetime
colnames(xy) <- c("locx", "locy")

### Time
time <- as.numeric(explan@endTime)/60 # Number of minutes since Jan 1, 1970 
timeBasis <- time - 884844 # Time starting at 1

#______________
## Build meshes
#______________
### Temporal mesh
timeGr <- 11
timeSeq <- seq(min(timeBasis),
               max(timeBasis),
               length = timeGr)

meshTime <- inla.mesh.1d(timeSeq)

### Spatial mesh
maxEdge <- 35000
meshSpace <- inla.mesh.2d(boundary = gulf, 
                          max.edge=maxEdge * c(0.5,2),
                          cutoff=maxEdge/4,
                          offset = c(10000,20000))

# plot points under the mesh
par(mar = c(1,1,1,1))
plot(number@sp, pch = 19, cex = 0.2, col = "red")
plot(gulf, add = TRUE)
plot(meshSpace, main = "", asp = 1, add = TRUE)

# plot abundances under the mesh too
plot(number@sp, pch = 19, cex = number@data$`WHITE HAKE`/300, col = "red")
plot(gulf, add = TRUE)
plot(meshSpace, main = "", asp = 1, add = TRUE)

# Number of edges in the mesh
meshSpace$n
## [1] 866

How many estimations will be carried out?

meshSpace$n * meshTime$n
## [1] 9526

Step 2 - Define the stochastic partial differential equation

## [1] 34.63971

Step 3 - Priors and hyperparameters for temporal autocorrelation

# Temporal autocorrelation prior
hSpec <- list(theta=list(prior='pccor1', param=c(0.2, 0.9))) 
# assuming not a lot of temporal autocorrelation (0.2) and we are fairly sure of that (0.9)

# Precision likelihood 
## (negative binomial or any distribution that has more than one parameter)
precPrior <- list(prior='pc.prec', param=c(1, 0.01))
# guessing it's 1 with a confidence of .01, which is super low.

# note ^ is not useful if using a Poisson.

Step 4 - Index matrix

Field <- inla.spde.make.index("field", 
                              n.spde = SPDE$n.spde,
                              n.group = meshTime$n)

Step 5 - A matrix

# For estimation
A <- inla.spde.make.A(meshSpace,
                      loc = coordinates(number@sp),
                      group = timeBasis,
                      group.mesh = meshTime)

Step 6 - Organise the A matrix into a list

Alist <- as.list(rep(1,5)) # list with single values
Alist[[1]] <- A

Step 7 - Organise the effects (spatial autocorrelation structure and explanatory variables)

Here, we’re using variables about the bottom conditions, because White hake is a groundfish.

effect <- list(Field,
               bTemp =  explan@data$bottom.temperature,
               bSal = explan@data$bottom.salinity,
               bDep = explan@data$depth,
               strat = explan@data$stratum)

Step 8 - Build stack

Stack <- inla.stack(data=list(whitehake = number@data$`WHITE HAKE`),
                    A = Alist,
                    effects = effect,
                    tag="basis")

Step 9 - Building the model (Finally!)

Model selection

# build candidate models --- only run with two time points. otherwise, eternal
form_0 <- whitehake ~ 0 +
  f(field, model=SPDE, group = field.group, 
    control.group=list(model='ar1', hyper=hSpec)) 
form_1 <- whitehake ~ 0 + bTemp +
  f(field, model=SPDE, group = field.group, 
    control.group=list(model='ar1', hyper=hSpec)) 
form_2 <- whitehake ~ 0 + bSal +
  f(field, model=SPDE, group = field.group, 
    control.group=list(model='ar1', hyper=hSpec)) 
form_3 <- whitehake ~ 0 + bDep +
  f(field, model=SPDE, group = field.group, 
    control.group=list(model='ar1', hyper=hSpec)) 
form_4 <- whitehake ~ 0 + bTemp + bSal +
  f(field, model=SPDE, group = field.group, 
    control.group=list(model='ar1', hyper=hSpec)) 
form_5 <- whitehake ~ 0 + bTemp + bDep +
  f(field, model=SPDE, group = field.group, 
    control.group=list(model='ar1', hyper=hSpec)) 
form_6 <- whitehake ~ 0 + bSal + bDep +
  f(field, model=SPDE, group = field.group, 
    control.group=list(model='ar1', hyper=hSpec)) 
form_7 <- whitehake ~ 0 + bTemp + bSal + bDep +
  f(field, model=SPDE, group = field.group, # how they're grouped
    control.group=list(model='ar1', hyper=hSpec)) # build with model parameters
forms_ls <- list(form_0, form_1, form_2, form_3, form_4, form_5, form_6, form_7)

# evaluate each model
model_ls <- vector("list", length = length(forms_ls))
for(i in 1:length(model_ls)){
  model_ls[[i]] <- inla(forms_ls[[i]],
                        data = inla.stack.data(Stack),
                        family="gaussian",
                        control.family = list(link="identity"),
                        control.predictor = list(A = inla.stack.A(Stack), 
                                                 compute = TRUE, 
                                                 link = 1),
                        control.compute = list(waic = TRUE, config = TRUE))
}

# extract waic to compare
waic_df <- data.frame(model = 1:7, waic = NA)
for(i in 1:nrow(waic_df)){
  waic_df[i,2] <- model_ls[[i]]$waic$waic
}
# find lowest waic (it's model #4)
waic_df[which(waic_df$waic == min(waic_df$waic)),]

#saveRDS(model, "outputs/whitehake_model.RDS")

Build chosen model

form <- whitehake ~ 0 + bTemp + bSal +
  f(field, # temporal autocorr
    model=SPDE, # spatial autocorr structure
    group = field.group, # how they're grouped
    control.group=list(model='ar1', hyper=hSpec) # build with model parameters
  ) +
  f(
    strat,
    model = "iid",
    hyper = list(theta = list(prior = "gaussian",
                              param = c(0, 0.001)))
  )

model <- inla(form,
              data = inla.stack.data(Stack),
              family="nbinomial",
              control.family = list(link="log", hyper = list(theta=precPrior)),
              control.predictor = list(A = inla.stack.A(Stack), 
                                       compute = TRUE, 
                                       link = 1),
              control.compute = list(waic = TRUE,
                                     config = TRUE))

Checking and interpreting the model

summary(model)
## 
## Call:
##    c("inla(formula = form, family = \"nbinomial\", data = 
##    inla.stack.data(Stack), ", " control.compute = list(waic = TRUE, config 
##    = TRUE), control.predictor = list(A = inla.stack.A(Stack), ", " compute 
##    = TRUE, link = 1), control.family = list(link = \"log\", ", " hyper = 
##    list(theta = precPrior)))") 
## Time used:
##     Pre = 4.1, Running = 8474, Post = 5.59, Total = 8484 
## Fixed effects:
##         mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## bTemp  0.163 0.012      0.138    0.163      0.188  0.163   0
## bSal  -0.006 0.003     -0.012   -0.006      0.000 -0.006   0
## 
## Random effects:
##   Name     Model
##     field SPDE2 model
##    strat IID model
## 
## Model hyperparameters:
##                                                            mean       sd
## size for the nbinomial observations (1/overdispersion) 9.22e-01    0.047
## Range for field                                        7.36e+04 8566.676
## Stdev for field                                        3.24e+00    0.314
## GroupRho for field                                     9.18e-01    0.019
## Precision for strat                                    3.02e-01    0.131
##                                                        0.025quant 0.5quant
## size for the nbinomial observations (1/overdispersion)   8.32e-01 9.21e-01
## Range for field                                          5.85e+04 7.30e+04
## Stdev for field                                          2.67e+00 3.22e+00
## GroupRho for field                                       8.75e-01 9.20e-01
## Precision for strat                                      1.24e-01 2.76e-01
##                                                        0.975quant     mode
## size for the nbinomial observations (1/overdispersion)   1.02e+00 9.19e-01
## Range for field                                          9.22e+04 7.17e+04
## Stdev for field                                          3.91e+00 3.17e+00
## GroupRho for field                                       9.51e-01 9.24e-01
## Precision for strat                                      6.29e-01 2.32e-01
## 
## Expected number of effective parameters(stdev): 559.64(27.91)
## Number of equivalent replicates : 11.77 
## 
## Watanabe-Akaike information criterion (WAIC) ...: 13961.27
## Effective number of parameters .................: 443.51
## 
## Marginal log-Likelihood:  -7363.08 
## Posterior marginals for the linear predictor and
##  the fitted values are computed
model$summary.hyperpar
##                                                                mean
## size for the nbinomial observations (1/overdispersion) 9.217642e-01
## Range for field                                        7.362193e+04
## Stdev for field                                        3.237178e+00
## GroupRho for field                                     9.182521e-01
## Precision for strat                                    3.016166e-01
##                                                                  sd
## size for the nbinomial observations (1/overdispersion) 4.692327e-02
## Range for field                                        8.566676e+03
## Stdev for field                                        3.140363e-01
## GroupRho for field                                     1.945179e-02
## Precision for strat                                    1.308805e-01
##                                                          0.025quant
## size for the nbinomial observations (1/overdispersion) 8.323237e-01
## Range for field                                        5.851544e+04
## Stdev for field                                        2.674446e+00
## GroupRho for field                                     8.748938e-01
## Precision for strat                                    1.238795e-01
##                                                            0.5quant
## size for the nbinomial observations (1/overdispersion) 9.207976e-01
## Range for field                                        7.301220e+04
## Stdev for field                                        3.217883e+00
## GroupRho for field                                     9.200821e-01
## Precision for strat                                    2.759619e-01
##                                                          0.975quant
## size for the nbinomial observations (1/overdispersion) 1.017055e+00
## Range for field                                        9.216896e+04
## Stdev for field                                        3.908579e+00
## GroupRho for field                                     9.508876e-01
## Precision for strat                                    6.289251e-01
##                                                                mode
## size for the nbinomial observations (1/overdispersion) 9.192038e-01
## Range for field                                        7.169535e+04
## Stdev for field                                        3.175286e+00
## GroupRho for field                                     9.235507e-01
## Precision for strat                                    2.320843e-01
Efxplot(list(model)) + theme_classic()
## Loading required package: MCMCglmm
## Loading required package: coda
## Loading required package: ape
## 
## Attaching package: 'ape'
## The following objects are masked from 'package:raster':
## 
##     rotate, zoom

# from Coding Club tutorial: "NB: There are no P values in INLA. Importance or significance of variables can be deduced by examining the overlap of their 2.5% and 97.5% posterior estimates with zero."
ExpY <- model$summary.fitted.values[1:nrow(number@data),"mean"]

#For the variance we also need the posterior mean value of θ, which is obtained via
phi1 <- model$summary.hyper[1, "mean"]

#The variance is then given by
VarY <- ExpY * (1 - ExpY) / (1 + phi1)

# And once we have the mean and the variance we can calculate Pearson residuals.
E1 <- (number@data$`WHITE HAKE` - ExpY) / sqrt(VarY)
## Warning in sqrt(VarY): NaNs produced
# Apply model validation
# Figure 23.11
par(mfrow = c(1,1), mar = c(5,5,2,2), cex.lab = 1.5)
plot(x = model$summary.fitted.values$mean[1:nrow(number@data)], 
     y = E1,
     xlab = "Fitted values",
     ylab = "Pearson residuals",
     xlim = c(0, 1))
abline(h = 0, lty = 2)

# note: there's a value at 130...

Fixed effects

This is the output you would get from a normal linear model. - mode: mode of the parameters distribution - kld: ????

Random effects

Field group is the random effect we set earlier.

DF

Distribution for the parameter picked with mean, sd, and credible interval

Predictions through space and time

# Dimension of the raster
stepsize <- 1000 # choose cell size: 1000m x 1000m
rangeX <- range(meshSpace$loc[,1])
rangeY <- range(meshSpace$loc[,2])
nxy <- round(c(diff(rangeX), 
               diff(rangeY)) / stepsize)

# Define basis of the map
mapBasis <- inla.mesh.projector(meshSpace,
                               xlim = rangeX,
                               ylim = rangeY,
                               crs = crs(gulf))


# Calculate prediction
mapMean <- vector("list", length = timeGr)
map.025 <- vector("list", length = timeGr)
map.975 <- vector("list", length = timeGr)

# for every time point
for(i in 1:timeGr){
  # Model prediction
  fitMean <- inla.mesh.project(mapBasis, 
                          model$summary.random$field$mean[1:SPDE$n.spde + (i - 1) * SPDE$n.spde])
  fit.025 <- inla.mesh.project(mapBasis, 
                               model$summary.random$field$`0.025quant`[1:SPDE$n.spde + (i - 1) * SPDE$n.spde])
  fit.975 <- inla.mesh.project(mapBasis, 
                               model$summary.random$field$`0.975quant`[1:SPDE$n.spde + (i - 1) * SPDE$n.spde])

  # Build maps with confidence intervals
  mapMean[[i]] <- raster(t(fitMean[,ncol(fitMean):1]),
                         xmn = min(mapBasis$x), xmx = max(mapBasis$x), 
                         ymn = min(mapBasis$y), ymx = max(mapBasis$y),
                         crs = crs(gulf))

  map.025[[i]] <- raster(t(fit.025[,ncol(fit.025):1]),
                         xmn = min(mapBasis$x), xmx = max(mapBasis$x), 
                         ymn = min(mapBasis$y), ymx = max(mapBasis$y),
                         crs = crs(gulf))
  
  map.975[[i]] <- raster(t(fit.975[,ncol(fit.975):1]),
                         xmn = min(mapBasis$x), xmx = max(mapBasis$x), 
                         ymn = min(mapBasis$y), ymx = max(mapBasis$y),
                         crs = crs(gulf))
}  

# Convert to a rasterStack (aligns the elements of a list)
rasterMean <- stack(mapMean)
raster.025 <- stack(map.025)
raster.975 <- stack(map.975)
  # to check this visually: 
  # image(mapMean[[1]])
  # plot(meshSpace, add = TRUE)

# reorganise
for(i in 1:5){
  values(rasterMean)[,i] <- as.vector(t(mapMean[[i]]))
  values(raster.025)[,i] <- as.vector(t(map.025[[i]]))
  values(raster.975)[,i] <- as.vector(t(map.975[[i]]))
}

# Mask the region to keep only the region of interest
rasterMeanMask <- mask(rasterMean, gulf)
raster.025Mask <- mask(raster.025, gulf)
raster.975Mask <- mask(raster.975, gulf)
# image(rasterMeanMask)
# plot(gulf, add = TRUE)
# Map the results
par(mfrow = c(2,3), mar = c(1,1,3,1))

for(i in 1:timeGr){
zlimRange <- range(values(raster.025Mask[[i]]),
                   values(rasterMeanMask[[i]]),
                   values(raster.975Mask[[i]]), na.rm = TRUE)
}

# convert to data frames for ggplot
raster.025Mask_df <- as.data.frame(raster.025Mask, xy = TRUE, na.rm = TRUE) %>%
  pivot_longer(cols = 3:ncol(.)) %>%
  mutate(quant = "Lower (0.025)")

rasterMeanMask_df <- as.data.frame(rasterMeanMask, xy = TRUE, na.rm = TRUE) %>%
  pivot_longer(cols = 3:ncol(.)) %>%
  mutate(quant = "Mean")

raster.975Mask_df <- as.data.frame(raster.975Mask, xy = TRUE, na.rm = TRUE) %>%
  pivot_longer(cols = 3:ncol(.)) %>%
  mutate(quant = "Higher (0.975)")

# bind together
raster_df <- rbind(raster.025Mask_df, rasterMeanMask_df, raster.975Mask_df)
raster_df$quant <- factor(raster_df$quant, levels = c("Lower (0.025)", "Mean", "Higher (0.975)"))

# make this more automatized later (label with years) 
raster_df$name <- factor(raster_df$name, levels = c("layer.1",
                                                    "layer.2",
                                                    "layer.3",
                                                    "layer.4",
                                                    "layer.5",
                                                    "layer.6",
                                                    "layer.7",
                                                    "layer.8", 
                                                    "layer.9", 
                                                    "layer.10", 
                                                    "layer.11"))

p <- ggplot() +
    geom_raster(data = filter(raster_df, quant == "Mean"), 
                aes(x = x, y = y, fill = value)) + 
    labs(fill = "Abundance") +
    scale_fill_distiller(palette = "RdBu", 
                         limits = c(-max(abs(raster_df$value)), 
                                    max(abs(raster_df$value)))) +
    #facet_wrap(~quant) +
    theme_void() +
  gganimate::transition_states(name, transition_length=.6, ) +
  labs(title = 'Time group: {closest_state}')
p

# save!
# gganimate::anim_save("predictions.gif", animation = p, path = "figures/")

Each time step:

p2 <- ggplot() +
    geom_raster(data = raster_df, 
                aes(x = x, y = y, fill = value)) + 
    labs(fill = "Abundance") +
    scale_fill_distiller(palette = "RdBu", 
                         limits = c(-max(abs(raster_df$value)), 
                                    max(abs(raster_df$value)))) +
    facet_grid(name~quant) +
    theme_void() 
p2